library(tidyverse)
library(ggpubr)

i2t_position <- 161643798
g2r_position <- 106204113

# generate pop list for vcftool fst calc ------
sub_meta <- read.delim('Archive/covid19/data/integrated_call_samples_v3.20130502.ALL.panel.txt') |>
  select(1:3)

pop_meta_list <- sub_meta |>
  group_by(super_pop) |>
  summarize(sample_list = list(sample))

pop_meta_list$sample_list |>
  map2(pop_meta_list$super_pop,
       \(x,y)write_lines(x, str_glue('Archive/covid19/ref/1kgp_meta/{y}.txt')))

# 20k windowed pi ---------
nucdiv <- read_delim('Archive/covid19/data/fcgr2b-it-EAS.windowed.pi') |>
  mutate(position = BIN_END - 1e5) |>
  select(c(position, PI))

# YRI-EAS fst 20k windowed
fst <- read_delim('Archive/covid19/data/fcgr2b-it-EAS-YRI.windowed.weir.fst') |> 
  mutate(position = BIN_END - 1e5) |>
  select(c(position, WEIGHTED_FST)) 

# Tajima's D in EAS
tajima <- read_delim('Archive/covid19/data/fcgr2b-it-EAS.Tajima.D') |>
  mutate(position = BIN_START + 1e5) |>
  select(c(position, TajimaD))

tajima |>
  left_join(fst) |>
  left_join(nucdiv) |>
  pivot_longer(2:4, names_to = 'stat', values_to = 'value') |>
  ggplot(aes(x = position, y = value)) +
  geom_path() +
  geom_vline(xintercept = i2t_position, color = 'red', linetype = 'dashed') +
  geom_hline(yintercept = 0) +
  theme_pubr() +
  labs_pubr() +
  facet_wrap(~stat, scales = 'free', ncol = 1)

# CLR from sweeD -------
g2r_sweed <- read_delim('Archive/covid19/data/SweeD_Report.ighg1-ld.txt',
                        skip = 2)

g2r_sweed |>
  ggplot(aes(Position, Likelihood)) +
  geom_path() +
  geom_vline(xintercept = g2r_position, color = 'red', linetype = 'dashed')
